# Linear Models with Outliers: Choosing between Conditional-Mean and Conditional-Median Methods
# State Politics & Policy Quarterly 11(4).
# Jeffrey J. Harden (jjharden@unc.edu) and Bruce A. Desmarais (desmarais@polsci.umass.edu)
#
# Simulation file
# This file replicates the simulation results with N = 50
###############################################################################

library(quantreg)
library(MASS)
library(normalp)

set.seed(111007)

### Functions for the Computation of Vuong and the Cross-Validated Johnson's t-test with OLS and MR ###

## Compute Skewness ##

mu3hat <- function(x){
	n <- length(x)
	ns <- n*1/(n-1)*1/(n-2)
	ns*sum((x-mean(x))^3)
}

## Johnson's t ##

johnsons.t <- function(x){
	m3 <- mu3hat(x)
	s <- sd(x)
	n <- length(x)
	(mean(x)+m3/(6*s^2*n)+m3/(3*s^4)*mean(x)^2)*sqrt(n)/s
}

## Centered Laplace Density ##

dlapl <- function(x,b){
	return(1/(2*b)*exp(-abs(x/b)))
}

desingular <- function(x,y){
	inv <- try(solve(t(x)%*%x))
	if(!is.matrix(inv)){
		sdy <- sd(y)
		for(i in 1:ncol(x)){
			sdx <- sd(x[,i])
			if(sdx > 0) x[,i] <- x[,i]+rnorm(length(x[,i]),sd=sdx/10000)
			if(sdx == 0) x[,i] <- x[,i]+rnorm(length(x[,i]),sd=sdy/10000)
		}
	}
	return(x)
}

## lls for MR and OLS ##

ll2 <- function(formula,data){
	ls <- lm(formula,data=data)
	mr <- rq(formula,data=data)
	sig <- summary(ls)$sigma
	b <- mean(abs(residuals(mr)))
	ll_ls <- dnorm(residuals(ls),sd=sig,log=T)
	ll_mr <- log(dlapl(residuals(mr),b=b))
	return(list(LS=ll_ls,MR=ll_mr))
}

## cvlls for MR and OLS ##

cvll2 <- function(formula,data){
	require(MASS)
	require(quantreg)
	est <- lm(formula,data=data,x=T,y=T)
	x <- est$x
	y <- est$y
	cvll.ls <- numeric(length(y))
	cvll.mr <- numeric(length(y))
	for(i in 1:length(y)){
		yt <- y[-i]
		xt <- desingular(x[-i,],yt)
		yv <- y[i]
		xv <- x[i,]
		ls <- lm(yt~-1+xt)
		mr <- rq(yt~-1+xt)
		sig <- summary(ls)$sigma
		b <- mean(abs(residuals(mr)))
		cvll.ls[i] <- dnorm(yv-rbind(xv)%*%coef(ls),sd=sig,log=T)
		cvll.mr[i] <- log(dlapl(yv-rbind(xv)%*%coef(mr),b=b))
	}
	return(list(LS=cvll.ls,MR=cvll.mr))
}

## The function for computing the CVJT ##
# Takes formula and data frame arguments
# Returns cvjt and vuong -- respective t-statistics, can be used as t or z stats.
# both tests are fit(OLS)-fit(MR), such that negative values suport MR

CVDM <- function(formula,data){
	require(quantreg)
	lls <- ll2(formula,data)
	cvlls <- cvll2(formula,data)
	lld <- lls[[1]]-lls[[2]]
	cvlld <- cvlls[[1]]-cvlls[[2]]
	return(list(cvjt = johnsons.t(cvlld),vuong=t.test(lld)$statistic))
}


sims <- 1000
n <- 50
p <- seq(1, 2, length = 50) 
vuongs <- numeric(sims)
cvjts <- numeric(sims)            
beta.ls <- matrix(NA,sims,2)
beta.mr <- matrix(NA,sims,2)
mse.ols <- numeric(length(p))
mse.mr <- numeric(length(p))
cvdm <- numeric(length(p))
vuong <- numeric(length(p))
cvdm.s <- numeric(length(p))
vuong.s <- numeric(length(p))
x <- rnorm(n)
b <- 1


for(j in 1:length(p)){
gc()

for(i in 1:sims){
	y <- b*x + rnormp(n, p = p[j])
  tests <- CVDM(y~x,data.frame(y,x))
	vuongs[i] <- tests[[2]]
	cvjts[i] <- tests[[1]]
	beta.ls[i,] <- coef(lm(y~x))
	beta.mr[i,] <- coef(rq(y~x))	
	cat("df loop =", j, "iteration =", i, "\n")
}
mse.ols[j] <- mean((beta.ls[ , 2] - b)^2)
mse.mr[j] <- mean((beta.mr[ , 2] - b)^2)
cvdm[j] <- median(cvjts)
cvdm.s[j] <- length(subset(cvjts, cvjts > 0))/sims
vuong[j] <- median(vuongs) 
vuong.s[j] <- length(subset(vuongs, vuongs > 0))/sims
}

save.image("n50.RData")
